import pandas as pd

excel_file_path = '../A7_excel_data/A7-新闻文本分类算法.xlsx'
cleaned_excel_file_path = '../A7_excel_data/A7-新闻文本分类算法_cleaned.xlsx'

# 对于单个工作表的Excel文件
# 读取文件
# df = pd.read_excel(excel_file_path)
# # 指定第一列
# column_to_check = df.columns[0]
#
# # 删除特定列含有空值的行
# df_cleaned = df.dropna(subset = [column_to_check])
#
# # 将清洗后的数据保存回Excel文件
# df_cleaned.to_excel('A7-新闻文本分类算法_cleaned.xlsx', index=False)
#

# 对于多个工作表的Excel文件
with pd.ExcelFile(excel_file_path) as xls:
    # 获取所有工作表的名称
    sheetnames = xls.sheet_names

    with pd.ExcelWriter(cleaned_excel_file_path) as writer:
        # 遍历所有工作表
        for sheet_name in sheetnames:
            # 读取当前工作表的数据
            df = pd.read_excel(xls, sheet_name = sheet_name)

            colum_to_check = df.columns[0]

            df_cleaned = df.dropna(subset = [colum_to_check])

            df_cleaned.to_excel(writer, sheet_name=sheet_name, index=False)

# 打印完成信息
print(f'清洗完成，文件已保存为：{cleaned_excel_file_path}')